# Dickstein, Ho, and Mark (2023)
# This file defines the demand specification and runs the demand estimation routine.

# Preliminaries
setwd("../../../../library")
source("PreliminariesCode.R")

# Creating modified exploded dataset
ravec <- 1:7
yrvec <- 2014:2016

## Loading cleaning function: 
source(paste0(project_folder, "/library/DA01QuanModifyExplodedDFunc.R"))

## Loading data
explodeddata <- fread(paste0(project_folder, "/data/explodeddata.csv"))

## Take a sample of hhids and save that sample:
explodeddata[, sample_id := .GRP, by = c("hhid", "year")]
sample_in_vec <- sample.int(max(explodeddata$sample_id), ceiling(.8 * max(explodeddata$sample_id)))
fwrite(
  explodeddata[!(sample_id %in% sample_in_vec), -"sample_id", with = F],
  paste0(project_folder, "/data/explodeddata_leftoutsample.csv"))
explodeddata <- explodeddata[(sample_id %in% sample_in_vec), -"sample_id", with = F]

## Putting the data in the right format for demand estimation
acgquinsvec <- as.numeric(scan(file = paste0(project_folder, "/data/orig/acg_positive_quintiles"), sep = ",", what = "character")[2:7])
print(paste0("Modifying... ravec: ", paste(ravec, collapse = " and "), "yrvec: ", paste(yrvec, collapse = " and ")))
data <- mod_exploded_data(
  explodeddata, 
  paste0(project_folder, "/data"),
  highmeanthresh = 0.59616,
  lowsumthresh = 0.2575,
  highsumthresh = 1.76329,
  highmaxthresh = 3.37157,
  medincome = 2.5,
  acgquins = acgquinsvec,
  winsorbound = 89.3707340916662,
  ravec = ravec,
  yrvec = yrvec)

# Setting options
specs_location <- paste0(project_folder, "/analysis/estimationcode/specs/lowrisk_simplemh_censor0.2_80sample_main")
specs_name <- "lowrisk_simplemh_censor0.2_80sample_main"
testing <- T
output_path <- paste0(specs_location, "/output")
est_type <- "full_only"
fixed_cost_censor <- 0.2
howmuchest <- "both" # can be "both", "estimation", "stderr"
sumrisk_cutoff <- 70
check_derivatives <- FALSE

# Creating output folder
dir.create(output_path, recursive = T)
f <- list.files(output_path, include.dirs = F, full.names = T)
file.remove(f)

# Set/determine variables
if(est_type == "switcher_only"){
  data <- data[switcher == 1]
}
not_zero <- function(x) ifelse(is.numeric(x), sum(x, na.rm = T) != 0, F)
nums <- unlist(lapply(data, not_zero))
payervars <- grep("PAYER_ID_", names(data[, ..nums]), value=TRUE)
var_names <- list(
  W1_names = c("low_sum_risk", "sum_concurrent_risk",
               "withkids", "over50"),
  W2_names = c("cons"),
  W3_names = c("cons"),
  Z_names  = c(payervars, "silver_costshare")
)
var_switch <- list(
  W1_names = rep(0, length(var_names$W1_names)),
  W2_names = rep(0, length(var_names$W2_names)),
  W3_names = rep(0, length(var_names$W3_names)),
  Z_names = rep(0, length(var_names$Z_names))
)

# Saving Observation Level Data For summary statistics - 80sample: comment out 
##obs_level_data <- data %>% filter(choice == 1)
##save(obs_level_data, file=paste0(output_path, "/obs_level_data.rda"))
##write.dta(obs_level_data, file=paste0(output_path, "/obs_level_data.dta"))

# Calling estimate demand!
if(howmuchest %in% c("both", "estimation")){
  source(paste0(specs_location, "/../../estimatedemand.R"))
}

## Loading functions
source(paste0(specs_location, "/../../get_llh.R"))
source(paste0(specs_location, "/../../parametrize_choice_data.R"))
source(paste0(specs_location, "/../../parametrize_cost_data.R"))
source(paste0(specs_location, "/../../generatestderrors.R"))

## Adjusted data to account for specification type (e.g. switcher)
if(howmuchest %in% c("stderr")){
  source(paste0(specs_location, "/../../adjustfortype.R"))
}

## Estimating standard errors
if(howmuchest %in% c("both", "stderr")){
  data <- as.data.frame(data)
  get_diags(data, output_path = output_path)
  get_se(data, output_path = output_path, cost_censor = fixed_cost_censor)
}
